set more off, perm
set maxvar 120000
set varabbrev off
* ---------------------------------------------- *
global dir 	"`1'"
global Data 	$dir/Data
global Tables 	$dir/Tables
global Figures 	$dir/Figures
global Work 	$dir/Work
global Temp 	$dir/Temp
global Pseudo	$dir/Work/Temp
* ---------------------------------------------- *
* Load CRSP
import sas using $Data/CRSP/stocknames.sas7bdat, clear case(lower)
order *, sequential
order comnam permco permno ticker cusip ncusip shrcd shrcls siccd exchcd hexcd namedt nameenddt st_date end_date
format namedt nameenddt st_date end_date %td
* restrict sample to common stock trading on major exchanges:
keep if (shrcd==10 | shrcd==11)
keep if (exchcd>=1 & exchcd<=3)

tempfile stocknames
save "`stocknames'", replace

replace ticker=trim(ticker)
rename  ticker tickercrsp

replace  comnam=trim(comnam)
generate comnam_s=subinstr(comnam  ," ","",.)
replace  comnam_s=subinstr(comnam_s,".","",.)
replace  comnam_s=subinstr(comnam_s,",","",.)

* note: For all stocks still trading, the last "nameenddt" is 12/29/2023.
save $Temp/crsp, replace
/* ********************************************************************* */
* get the IBES data
import sas using $Data/IBES/statsumu_epsus.sas7bdat, case(lower) clear
keep if index(measure,"EPS")>0
drop if index(fpi,"A") | index(fpi,"B") | index(fpi,"C") | index(fpi,"D")
destring fpi, replace

keep if usfirm==1 & index(curcode,"USD")>0

keep medest fpedats statpers fpi cusip ticker oftic cname 
recast str cname
duplicates drop
 
reshape wide medest fpedats, i(cusip ticker oftic cname statpers) j(fpi)
order *, sequential
order statpers 
drop fpedats0
replace  cname=trim(cname)
generate cname_s=subinstr(cname  ," ","",.)
replace  cname_s=subinstr(cname_s,".","",.)
replace  cname_s=subinstr(cname_s,",","",.)

rename cusip  cusipibes
rename ticker tickeribes
save $Temp/ibes, replace
/* ********************************************************************* */
* merge by historical cusip (cusips are not reused but may change, e.g. mergers)
use if length(cusipibes)>0 using $Temp/ibes, clear
gen ncusip=cusipibes
joinby ncusip using $Temp/crsp

keep if (statpers>=namedt) & (statpers<=nameenddt) 
/* ************************** */
generate score=1

save $Temp/part1, replace
/* ********************************************************************* */
* Work with the observations that do not match on historical cusip
* match them on most recent CUSIP and ticker
use if length(cusipibes)>0 using $Temp/ibes, clear

generate cusip=cusipibes
joinby cusip using $Temp/crsp
keep if (statpers>=namedt) & (statpers<=nameenddt) 

* get rid of matches in part1.dta
keep if ncusip!=cusipibes
generate score=2 if tickercrsp==oftic
replace  score=3 if tickercrsp!=oftic

save $Temp/part2, replace
/* ********************************************************************* */
* Work with the observations that do not match on cusip
* match them on name alone
use if length(cname_s)>0 using $Temp/ibes, clear
gen comnam_s=cname_s

joinby comnam_s using $Temp/crsp
keep if (statpers>=namedt) & (statpers<=nameenddt) 

* drop if in part1.dta or part2.dta
drop if cusipibes==cusip | cusipibes==ncusip

generate score=4

save $Temp/part3, replace
/* ********************************************************************* */
* Work with the observations that do not match on ticker and cusip
* check whether we get a good match on the ticker alone
use if length(oftic)>0 using $Temp/ibes, clear
gen tickercrsp=oftic

joinby tickercrsp using $Temp/crsp
keep if (statpers>=namedt) & (statpers<=nameenddt) 

* drop if in part1.dta or part2.dta or part3.dta
drop if cusipibes==cusip
drop if cusipibes==ncusip
keep if comnam_s!=cname_s

foreach var of varlist cname_s comnam_s {
gen ln=strlen(`var')
	replace  `var'=substr(`var',1,ln-3) if substr(`var',ln-3+1,ln)=="INC"
	replace  `var'=substr(`var',1,ln-3) if substr(`var',ln-3+1,ln)=="LTD"
	replace  `var'=substr(`var',1,ln-4) if substr(`var',ln-4+1,ln)=="CORP"
	replace  `var'=substr(`var',1,ln-2) if substr(`var',ln-2+1,ln)=="CO"
	replace  `var'=subinstr(`var',"-","",.)
	replace  `var'=strltrim(`var')
drop ln
}
ustrdist cname_s comnam_s, gen(dist)
keep if dist<5 

generate score=5 if (substr(cusip,1,6)==substr(cusipibes,1,6)) | (substr(ncusip,1,6)==substr(cusipibes,1,6))
replace  score=6 if score==.
save $Temp/part4, replace
/* ********************************************************************* */
use $Temp/part1, clear
append using $Temp/part2
append using $Temp/part3
append using $Temp/part4
order score comnam cname statpers *cusip* ticker* oftic permco permco
* ***********************
* keep best match if duplicates for IBES ticker
duplicates tag tickeribes statpers, gen(dup)
tabulate dup
sort tickeribes statpers score
by tickeribes statpers: gen obs=_n
tabulate obs if dup>0

drop if dup>0 & obs>1
drop dup obs
* ***********************
* keep best match if duplicates for permco/permno
duplicates tag permno permco statpers, gen(dup)
tabulate dup
sort permno permco statpers score
by permno permco statpers: gen obs=_n
drop if dup>0 & obs>1
drop dup obs

rename tickeribes ticker 

drop fpedats* medest* dist
order comnam cname statpers ticker oftic tickercrsp permco permno cusip ncusip 
sort permco permno statpers

drop c*_s namedt nameenddt hexcd exchcd siccd shrcd shrcls st_date end_date namedum 
* ***********************
* add back CRSP info
count
tempfile link
save "`link'", replace
* ***********************
import sas using $Data/CRSP/stocknames.sas7bdat, clear case(lower)
* restrict sample to common stock trading on major exchanges
keep if (shrcd==10 | shrcd==11) & (exchcd>=1 & exchcd<=3) 

* note:  The CRSP annual file has nameenddt equal to 3/31/2022 for all stocks trading at the end of the sample.
* I use the permco on 3/31/2022 to merge with IBES data during the period 4/1/2022+
replace nameenddt=mdy(3,31,2023) if nameenddt==mdy(12,30,2022)

rename ticker tickercrsp
joinby permno permco using "`link'"
keep if (statpers>=namedt) & (statpers<=nameenddt) 
count

format nameenddt namedt st_date end_date %td
save $Work/link_ibes_crsp2024, replace
/* ********************************************************************* */
!find $Temp -type f -delete
